knitr::opts_knit$set(root.dir = 'D:/BIOINF/PROJECTS/2_ANALYSES/18_GIT_PAPERS/TCR_BCR_antiPD1')
unlink("scripts/main/A_05_get_TCGA_biospecimen_RNA_cache", recursive = TRUE)
#####################
### Load packages ###
#####################
library(pacman)
pacman::p_load(TCGAbiolinks,dplyr,parallel,DT,extrafont)
extrafont::loadfonts(device="win")
windowsFonts(sans="Palatino Linotype")
loadfonts(device="win")
loadfonts(device="postscript")
###########################
### Main analysis paths ###
###########################
# Main
scriptsPath <- paste0("scripts/")
scriptsFunctionsPath <- paste0(scriptsPath,"functions/")
projectDataPath <- paste0("data/")
# Input
tcgaInputData <- paste0(projectDataPath,"TCGA/")
otherInputData <- paste0(projectDataPath,"other/")
referenceInputData <- paste0(projectDataPath,"reference/")
# Output
projectOutDataPath <- paste0("output/data_files/")
tcgaIntermediateData <- paste0(projectOutDataPath,"TCGA/")
# Session/dependencies
sessionInfoPath <- paste0("session_info/")
######################################
## Create intermediate output paths ##
######################################
if (!dir.exists(tcgaIntermediateData)) {dir.create(tcgaIntermediateData)}
##################################
### LOAD SOURCE FUNCTIONS FILE ###
##################################
source(paste0(scriptsFunctionsPath,"tcgaReplicateFilter.R"))
source(paste0(scriptsFunctionsPath,"get_TCGA_biospecimen_RNA_functions.R"))
#####################
### File suffixes ###
#####################
Rdata.suffix <- ".RData"
########################
### Script variables ###
########################
run_clean <- TRUE
To get all available biospecimen data(legacy and harmonized) for all TCGA projects. After downloading of the data, merging separately the legacy and harmonized, we filter the aliquote replicates, in order to get one aliquot/sample per patient. The replicated data are kept for later on, to be merged with HTSEQ-FPKM-UQ TCGA data, since there were inconsistencies in availability of aliquots.
The final data table used in this analysis consists of columns such as analyte, sample, patient barcode, TCGA project, sample type, TuTACK signature score and T-cell-inflamed GEP score (Ayers et al.).Again, since these are TCGA data, it seems we have multiple aliquots for some samples from the patients. To deal with this, Broad institute recommends to take the sample with the highest lexicographical sort value for the plate number (penultimate segment of the full TCGA barcode). To do this I have used an updated version of the tcga_replicateFilter function, created by ShixiangWang, made available on Github[https://github.com/ShixiangWang/Scripts/blob/master/TCGA_operation.R].
Use the function that applies the analyte and sort replicate filters on the aliquot barcodes of TCGA data. For RNA seq:
Note: We download data for both primary solid tumor and solid normal tissue. So after filtering, we expect to have a minimum of one aliquot/sample per patient, but we can also have two, one for tumor and one for normal.
# Load MSI data
msi_tcga_all_f <-loadRData(paste0(tcgaInputData,"msi_tcga_all",Rdata.suffix))
Due to archiving of GDC legacy data (see here),the code chunk below is no longer working. Downloaded biospecimen TCGA data can be found in the input TCGA data folder.
# There are differences between legacy and non legacy data, so I will dowanload both, have them separate BUT also merge them
###################################################################
tcga_projects <-TCGAbiolinks:::getGDCprojects()$project_id
mycancertypes <- tcga_projects[grepl("^TCGA-",tcga_projects)] # total 33
mycancertypes <- mycancertypes[order(mycancertypes)]
biospecimen_tcga.legacy <- data.frame(bcr_sample_barcode=character(),
sample_type_id = numeric(),
sample_type = character(),
project = character(),
bcr_aliquot_barcode = character(),
analyte_type_id = character(),
bcr_analyte_barcode = character(),
bcr_patient_barcode = character(),
stringsAsFactors=FALSE)
biospecimen_tcga.nonlegacy <- data.frame(bcr_sample_barcode=character(),
sample_type_id = numeric(),
sample_type = character(),
project = character(),
bcr_aliquot_barcode = character(),
analyte_type_id = character(),
bcr_analyte_barcode = character(),
bcr_patient_barcode = character(),
stringsAsFactors=FALSE)
################
### DOWNLOAD ###
################
#--------#
# LEGACY #
#--------#
no_cores <- detectCores()-1
cl <- makeCluster(no_cores,outfile=paste0(tcgaInputData,"extract_biospecimenLegacyData.txt"))
clusterEvalQ(cl, c(library(TCGAbiolinks),library(dplyr)))
# Export the play() function to the cluster
clusterExport(cl,c("get_biospecimen_tcga.legacy","biospecimen_tcga.legacy"))
# Get data for all cancers, parallelized
# FOR NOW REMOVING PAAD,ESCA... since there is a problem/corrupted gdc files
biospecimen.legacy <- parSapply(cl,mycancertypes[!mycancertypes %in% c("TCGA-ESCA","TCGA-OV","TCGA-PAAD","TCGA-UCS")],function(x) get_biospecimen_tcga.legacy(selectedcancer =x),simplify = FALSE)
stopCluster(cl)
# See For how many cancers we got data
length(biospecimen.legacy)
save(biospecimen.legacy,file=paste0(tcgaInputData,"biospecimen.legacy.all.RData"))
#------------#
# NON LEGACY #
#------------#
no_cores <- detectCores()-1
cl <- makeCluster(no_cores,outfile=paste0(tcgaInputData,"extract_biospecimenNONLegacyData.txt"))
# all_var<-ls()
# clusterExport(cl, all_var)
clusterEvalQ(cl, c(library(TCGAbiolinks),library(dplyr)))
# Export the play() function to the cluster
clusterExport(cl,c("get_biospecimen_tcga.nonlegacy","biospecimen_tcga.nonlegacy"))
# Get data for all cancers, parallelized
biospecimen.nonlegacy <- parSapply(cl,mycancertypes,function(x) get_biospecimen_tcga.nonlegacy(selectedcancer =x),simplify = FALSE)
stopCluster(cl)
# See For how many cancers we got data
length(biospecimen.nonlegacy)
save(biospecimen.nonlegacy,file=paste0(tcgaInputData,"biospecimen.nonlegacy.all.RData"))
All biospecimen data, NT and TP, RNA and DNA, legacy and then harmonized.
# Load legacy
biospecimen.legacy<-loadRData(paste0(tcgaInputData,"biospecimen.legacy.all",Rdata.suffix))
# Load non legacy
biospecimen.nonlegacy <-loadRData(paste0(tcgaInputData,"biospecimen.nonlegacy.all",Rdata.suffix))
tcga_projects <-TCGAbiolinks:::getGDCprojects()$project_id
mycancertypes <- tcga_projects[grepl("^TCGA-",tcga_projects)] # total 33
mycancertypes <- mycancertypes[order(mycancertypes)]
###########################
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tpnt <-process_biospecimenData(biospecimen.legacy, msi_tcga_all_f)
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tpnt <-process_biospecimenData(biospecimen.nonlegacy, msi_tcga_all_f)
### TM DATA
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tmnt <-process_biospecimenData_TM.NT(biospecimen.legacy, msi_tcga_all_f)
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tmnt <-process_biospecimenData_TM.NT(biospecimen.nonlegacy, msi_tcga_all_f)
Extract data only related to RNA-Seq, that means choosing analyte ID of R, T, H.
####################
### Get only RNA ###
####################
# UPDATED CODE BELOW TO GRAB ALL RNA analytes, R, H, T
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tpnt.rna <-biospecimen.legacy.tpnt %>% dplyr::filter(analyte_type_id %in% c("R","H","T")) %>% distinct()
# Are there duplicate rows--YESSS
biospecimen.legacy.tpnt.rna.cl <- biospecimen.legacy.tpnt.rna %>% distinct()## diff with below is 43, so 43 duplicates
dim(biospecimen.legacy.tpnt.rna.cl)
## [1] 23234 10
# biospecimen.legacy.tpnt.rna.dup <-biospecimen.legacy.tpnt.rna[duplicated(biospecimen.legacy.tpnt.rna),]# WHY DO I HAVE DUPS?LETS CHECK
# #Check duplicates in original df
# subset(biospecimen.legacy.tpnt.rna,biospecimen.legacy.tpnt.rna$bcr_aliquot_barcode %in% biospecimen.legacy.tpnt.rna.dup$bcr_aliquot_barcode)
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tpnt.rna <-biospecimen.nonlegacy.tpnt %>% dplyr::filter(analyte_type_id %in% c("R","H","T")) %>% distinct()
# Are there duplicate rows--YESSS,27271
biospecimen.nonlegacy.tpnt.rna.cl <- biospecimen.nonlegacy.tpnt.rna %>% distinct()## 27228,diff with below is 43, so 43 duplicates
dim(biospecimen.nonlegacy.tpnt.rna.cl)
## [1] 27230 10
biospecimen.nonlegacy.tpnt.rna.dup <-biospecimen.nonlegacy.tpnt.rna[duplicated(biospecimen.nonlegacy.tpnt.rna),]# WHY DO I HAVE DUPS?LETS CHECK
# #Check duplicates in original df
# subset(biospecimen.nonlegacy.tpnt.rna,biospecimen.nonlegacy.tpnt.rna$bcr_aliquot_barcode %in% biospecimen.nonlegacy.tpnt.rna.dup$bcr_aliquot_barcode)
#########################
##### METASTATIC ########
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tmnt.rna <-biospecimen.legacy.tmnt %>% dplyr::filter(analyte_type_id %in% c("R","H","T")) %>% distinct()
biospecimen.legacy.tmnt.rna.cl <- biospecimen.legacy.tmnt.rna %>% distinct()## diff with below is 43, so 43 duplicates
dim(biospecimen.legacy.tmnt.rna.cl)
## [1] 2273 10
# biospecimen.legacy.tmnt.rna.dup <-biospecimen.legacy.tmnt.rna[duplicated(biospecimen.legacy.tmnt.rna),]# WHY DO I HAVE DUPS?LETS CHECK
# #Check duplicates in original df
# subset(biospecimen.legacy.tmnt.rna,biospecimen.legacy.tmnt.rna$bcr_aliquot_barcode %in% biospecimen.legacy.tmnt.rna.dup$bcr_aliquot_barcode)
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tmnt.rna <-biospecimen.nonlegacy.tmnt %>% dplyr::filter(analyte_type_id %in% c("R","H","T")) %>% distinct()
# Are there duplicate rows--YESSS,27271
biospecimen.nonlegacy.tmnt.rna.cl <- biospecimen.nonlegacy.tmnt.rna %>% distinct()## 27228,diff with below is 43, so 43 duplicates
dim(biospecimen.nonlegacy.tmnt.rna.cl)
## [1] 2370 10
biospecimen.nonlegacy.tmnt.rna.dup <-biospecimen.nonlegacy.tmnt.rna[duplicated(biospecimen.nonlegacy.tmnt.rna),]# WHY DO I HAVE DUPS?LETS CHECK
# #Check duplicates in original df
# subset(biospecimen.nonlegacy.tmnt.rna,biospecimen.nonlegacy.tmnt.rna$bcr_aliquot_barcode %in% biospecimen.nonlegacy.tmnt.rna.dup$bcr_aliquot_barcode)
##~~~~~~~~##
## TABLES ##
##~~~~~~~~##
#--------#
# LEGACY #
#--------#
# Number of patients per project
biospecimen.legacy.tpnt.rna.patient.table <- biospecimen.legacy.tpnt.rna.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tpnt.rna.aliq.table <- biospecimen.legacy.tpnt.rna.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.legacy.tpnt.rna.aliq.table <-biospecimen.legacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.legacy.tpnt.rna.aliqPatient.table <-biospecimen.legacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.legacy.tpnt.rna.aliqPatient.table <-biospecimen.legacy.tpnt.rna.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.legacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#------------#
# NON LEGACY #
#------------#
# Number of patients per project
biospecimen.nonlegacy.tpnt.rna.patient.table <-biospecimen.nonlegacy.tpnt.rna.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tpnt.rna.aliq.table <- biospecimen.nonlegacy.tpnt.rna.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tpnt.rna.aliq.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tpnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.nonlegacy.tpnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
###############
### METASTATIC
#--------#
# LEGACY #
#--------#
# Number of patients per project
biospecimen.legacy.tmnt.rna.patient.table <- biospecimen.legacy.tmnt.rna.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tmnt.rna.aliq.table <- biospecimen.legacy.tmnt.rna.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.legacy.tmnt.rna.aliq.table <-biospecimen.legacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.legacy.tmnt.rna.aliqPatient.table <-biospecimen.legacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.legacy.tmnt.rna.aliqPatient.table <-biospecimen.legacy.tmnt.rna.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.legacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#------------#
# NON LEGACY #
#------------#
# Number of patients per project
biospecimen.nonlegacy.tmnt.rna.patient.table <-biospecimen.nonlegacy.tmnt.rna.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tmnt.rna.aliq.table <- biospecimen.nonlegacy.tmnt.rna.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tmnt.rna.aliq.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tmnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.nonlegacy.tmnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
datatable(biospecimen.legacy.tpnt.rna.patient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
datatable(biospecimen.legacy.tpnt.rna.aliq.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (legacy)'
)
datatable(biospecimen.legacy.tpnt.rna.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (legacy)'
)
datatable(biospecimen.legacy.tmnt.rna.patient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
datatable(biospecimen.legacy.tmnt.rna.aliq.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (legacy)'
)
datatable(biospecimen.legacy.tmnt.rna.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (legacy)'
)
datatable(biospecimen.nonlegacy.tpnt.rna.patient.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (harmonized)'
)
datatable(biospecimen.nonlegacy.tpnt.rna.aliq.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (harmonized'
)
datatable(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (harmonized)'
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Then I merge legacy with harmonized data
## Merge Legacy and non-legacy tables-RNA
biospecimen.tpnt.rna.merged <- rbind(biospecimen.legacy.tpnt.rna.cl,biospecimen.nonlegacy.tpnt.rna.cl)#50462
# Check if duplicates-remove duplicate rows
biospecimen.tpnt.rna.merged.cl <- biospecimen.tpnt.rna.merged %>% distinct()#27230
dim(biospecimen.tpnt.rna.merged.cl)
## [1] 27230 10
#--------#
# MERGED #
#--------#
# Number of patients per project
biospecimen.tpnt.rna.merged.patient.table <-biospecimen.tpnt.rna.merged.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tpnt.rna.merged.aliq.table <- biospecimen.tpnt.rna.merged.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tpnt.rna.aliq.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tpnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.merged.aliqPatient.table <-biospecimen.tpnt.rna.merged.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#######################
#### METASTATIC
## Merge Legacy and non-legacy tables-RNA
biospecimen.tmnt.rna.merged <- rbind(biospecimen.legacy.tmnt.rna.cl,biospecimen.nonlegacy.tmnt.rna.cl)#50462
# Check if duplicates-remove duplicate rows
biospecimen.tmnt.rna.merged.cl <- biospecimen.tmnt.rna.merged %>% distinct()#27230
dim(biospecimen.tmnt.rna.merged.cl)
## [1] 2372 10
#--------#
# MERGED #
#--------#
# Number of patients per project
biospecimen.tmnt.rna.merged.patient.table <-biospecimen.tmnt.rna.merged.cl %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tmnt.rna.merged.aliq.table <- biospecimen.tmnt.rna.merged.cl %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tmnt.rna.aliq.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tmnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.tmnt.rna.merged.aliqPatient.table <-biospecimen.tmnt.rna.merged.cl %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
After, I filter based on BROAD institute instructions with the two filtering processes, as described above. Filtering is performed separately in legacy, harmonized and merged data.
##################################
#### POST-FILTERING REPLICATES ###
##################################
##~~~~~~~~~~##
## Filtering##
##~~~~~~~~~~##
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tpnt.rna.barcodes <- tcga_replicateFilter(biospecimen.legacy.tpnt.rna.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
#23234=>10307
# Subset to filtered barcodes
biospecimen.legacy.tpnt.rna.filt <- subset(biospecimen.legacy.tpnt.rna.cl,biospecimen.legacy.tpnt.rna.cl$bcr_aliquot_barcode %in% biospecimen.legacy.tpnt.rna.barcodes)
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tpnt.rna.barcodes <- tcga_replicateFilter(biospecimen.nonlegacy.tpnt.rna.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
#27228 -> 11367
# Subset to filtered barcodes
biospecimen.nonlegacy.tpnt.rna.filt <- subset(biospecimen.nonlegacy.tpnt.rna.cl,biospecimen.nonlegacy.tpnt.rna.cl$bcr_aliquot_barcode %in% biospecimen.nonlegacy.tpnt.rna.barcodes)
#--------#
# MERGED #
#--------#
biospecimen.tpnt.rna.merged.cl.barcodes <- tcga_replicateFilter(biospecimen.tpnt.rna.merged.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
# 27230 -> 11368
# Subset to filtered barcodes
biospecimen.tpnt.rna.merged.filt <- subset(biospecimen.tpnt.rna.merged.cl,biospecimen.tpnt.rna.merged.cl$bcr_aliquot_barcode %in% biospecimen.tpnt.rna.merged.cl.barcodes)
###################
### METASTATIC
#--------#
# LEGACY #
#--------#
biospecimen.legacy.tmnt.rna.barcodes <- tcga_replicateFilter(biospecimen.legacy.tmnt.rna.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
#23234=>10307
# Subset to filtered barcodes
biospecimen.legacy.tmnt.rna.filt <- subset(biospecimen.legacy.tmnt.rna.cl,biospecimen.legacy.tmnt.rna.cl$bcr_aliquot_barcode %in% biospecimen.legacy.tmnt.rna.barcodes)
#
#------------#
# NON LEGACY #
#------------#
biospecimen.nonlegacy.tmnt.rna.barcodes <- tcga_replicateFilter(biospecimen.nonlegacy.tmnt.rna.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
#27228 -> 11367
# Subset to filtered barcodes
biospecimen.nonlegacy.tmnt.rna.filt <- subset(biospecimen.nonlegacy.tmnt.rna.cl,biospecimen.nonlegacy.tmnt.rna.cl$bcr_aliquot_barcode %in% biospecimen.nonlegacy.tmnt.rna.barcodes)
#
#--------#
# MERGED #
#--------#
biospecimen.tmnt.rna.merged.cl.barcodes <- tcga_replicateFilter(biospecimen.tmnt.rna.merged.cl$bcr_aliquot_barcode , analyte_target = "RNA",filter_FFPE=TRUE, full_barcode=TRUE)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
# 27230 -> 11368
# Subset to filtered barcodes
biospecimen.tmnt.rna.merged.filt <- subset(biospecimen.tmnt.rna.merged.cl,biospecimen.tmnt.rna.merged.cl$bcr_aliquot_barcode %in% biospecimen.tmnt.rna.merged.cl.barcodes)
##~~~~~~~~##
## TABLES ##
##~~~~~~~~##
#--------#
# LEGACY #
#--------#
# Number of patients per project
biospecimen.legacy.tpnt.rna.filt.patient.table <- biospecimen.legacy.tpnt.rna.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tpnt.rna.filt.aliq.table <- biospecimen.legacy.tpnt.rna.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.legacy.tpnt.rna.aliq.table <-biospecimen.legacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.legacy.tpnt.rna.aliqPatient.table <-biospecimen.legacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.legacy.tpnt.rna.filt.aliqPatient.table <-biospecimen.legacy.tpnt.rna.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.legacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#------------#
# NON LEGACY #
#------------#
# Number of patients per project
biospecimen.nonlegacy.tpnt.rna.filt.patient.table <-biospecimen.nonlegacy.tpnt.rna.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tpnt.rna.filt.aliq.table <- biospecimen.nonlegacy.tpnt.rna.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tpnt.rna.aliq.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tpnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.nonlegacy.tpnt.rna.filt.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#--------#
# MERGED #
#=-------#
# Number of patients per project
biospecimen.tpnt.rna.merged.filt.patient.table <-biospecimen.tpnt.rna.merged.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tpnt.rna.merged.filt.aliq.table <- biospecimen.tpnt.rna.merged.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tpnt.rna.aliq.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tpnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.merged.filt.aliqPatient.table <-biospecimen.tpnt.rna.merged.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tpnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
dim(biospecimen.legacy.tpnt.rna.filt)
## [1] 10307 10
dim(biospecimen.nonlegacy.tpnt.rna.filt)
## [1] 11368 10
dim(biospecimen.tpnt.rna.merged.filt)
## [1] 11368 10
########### METASTATIC
#--------#
# LEGACY #
#--------#
# Number of patients per project
biospecimen.legacy.tmnt.rna.filt.patient.table <- biospecimen.legacy.tmnt.rna.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tmnt.rna.filt.aliq.table <- biospecimen.legacy.tmnt.rna.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.legacy.tmnt.rna.aliq.table <-biospecimen.legacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.legacy.tmnt.rna.aliqPatient.table <-biospecimen.legacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.legacy.tmnt.rna.filt.aliqPatient.table <-biospecimen.legacy.tmnt.rna.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.legacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#------------#
# NON LEGACY #
#------------#
# Number of patients per project
biospecimen.nonlegacy.tmnt.rna.filt.patient.table <-biospecimen.nonlegacy.tmnt.rna.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tmnt.rna.filt.aliq.table <- biospecimen.nonlegacy.tmnt.rna.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tmnt.rna.aliq.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tmnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.nonlegacy.tmnt.rna.filt.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
#--------#
# MERGED #
#=-------#
# Number of patients per project
biospecimen.tmnt.rna.merged.filt.patient.table <-biospecimen.tmnt.rna.merged.filt %>% select(project,bcr_patient_barcode) %>% group_by(project) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tmnt.rna.merged.filt.aliq.table <- biospecimen.tmnt.rna.merged.filt %>% select(project,bcr_aliquot_barcode) %>% group_by(project) %>% distinct() %>% summarise(n())
#biospecimen.nonlegacy.tmnt.rna.aliq.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project) %>% summarize(no_rows=length(project))
# Table of number of replicates per patients per project
#biospecimen.nonlegacy.tmnt.rna.aliqPatient.table <-biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_patient_barcode) %>% summarize(no_rows=length(project))
biospecimen.tmnt.rna.merged.filt.aliqPatient.table <-biospecimen.tmnt.rna.merged.filt %>% select(project,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(project,bcr_patient_barcode) %>% distinct() %>% summarize(n())
# # Table of number of replicate aliquots
# biospecimen.nonlegacy.tmnt.rna %>% group_by(project,bcr_aliquot_barcode) %>% summarize(no_rows=length(project))
dim(biospecimen.legacy.tmnt.rna.filt)
## [1] 1166 10
dim(biospecimen.nonlegacy.tmnt.rna.filt)
## [1] 1215 10
dim(biospecimen.tmnt.rna.merged.filt)
## [1] 1216 10
datatable(biospecimen.legacy.tpnt.rna.filt.patient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
datatable(biospecimen.legacy.tpnt.rna.filt.aliq.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (legacy)'
)
datatable(biospecimen.legacy.tpnt.rna.filt.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (legacy)'
)
datatable(biospecimen.nonlegacy.tpnt.rna.filt.patient.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (harmonized)'
)
datatable(biospecimen.nonlegacy.tpnt.rna.filt.aliq.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (harmonized)'
)
datatable(biospecimen.nonlegacy.tpnt.rna.filt.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (harmonized)'
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
datatable(biospecimen.tpnt.rna.merged.filt.patient.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (merged)'
)
datatable(biospecimen.tpnt.rna.merged.filt.aliq.table , extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (merged)'
)
datatable(biospecimen.tpnt.rna.merged.filt.aliqPatient.table, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (merged)'
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
biospecimen.legacy.tpnt.rna.patient.table.comb <- merge(biospecimen.legacy.tpnt.rna.patient.table,biospecimen.legacy.tpnt.rna.filt.patient.table, by = "project")
colnames(biospecimen.legacy.tpnt.rna.patient.table.comb) <- c("project","pre","post")
datatable(biospecimen.legacy.tpnt.rna.patient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
biospecimen.legacy.tpnt.rna.aliq.table.comb <- merge(biospecimen.legacy.tpnt.rna.aliq.table,biospecimen.legacy.tpnt.rna.filt.patient.table, by = "project")
colnames(biospecimen.legacy.tpnt.rna.aliq.table.comb) <- c("project","pre","post")
datatable(biospecimen.legacy.tpnt.rna.aliq.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (legacy)'
)
biospecimen.legacy.tpnt.rna.aliqPatient.table.comb <- merge(biospecimen.legacy.tpnt.rna.aliqPatient.table,biospecimen.legacy.tpnt.rna.filt.aliqPatient.table, by = c("project","bcr_patient_barcode"))
colnames(biospecimen.legacy.tpnt.rna.aliqPatient.table.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.legacy.tpnt.rna.aliqPatient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (legacy)'
)
biospecimen.nonlegacy.tpnt.rna.patient.table.comb <- merge(biospecimen.nonlegacy.tpnt.rna.patient.table,biospecimen.nonlegacy.tpnt.rna.filt.patient.table, by = "project")
colnames(biospecimen.nonlegacy.tpnt.rna.patient.table.comb) <- c("project","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.patient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (nonlegacy)'
)
biospecimen.nonlegacy.tpnt.rna.aliq.table.comb <- merge(biospecimen.nonlegacy.tpnt.rna.aliq.table,biospecimen.nonlegacy.tpnt.rna.filt.patient.table, by = "project")
colnames(biospecimen.nonlegacy.tpnt.rna.aliq.table.comb) <- c("project","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.aliq.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (nonlegacy)'
)
biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.comb <- merge(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table,biospecimen.nonlegacy.tpnt.rna.filt.aliqPatient.table, by = c("project","bcr_patient_barcode"))
colnames(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (nonlegacy)'
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
biospecimen.tpnt.rna.merged.patient.table.comb <- merge(biospecimen.tpnt.rna.merged.patient.table,biospecimen.tpnt.rna.merged.filt.patient.table, by = "project")
colnames(biospecimen.tpnt.rna.merged.patient.table.comb) <- c("project","pre","post")
datatable(biospecimen.tpnt.rna.merged.patient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (nonlegacy)'
)
biospecimen.tpnt.rna.merged.aliq.table.comb <- merge(biospecimen.tpnt.rna.merged.aliq.table,biospecimen.tpnt.rna.merged.filt.patient.table, by = "project")
colnames(biospecimen.tpnt.rna.merged.aliq.table.comb) <- c("project","pre","post")
datatable(biospecimen.tpnt.rna.merged.aliq.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (nonlegacy)'
)
biospecimen.tpnt.rna.merged.aliqPatient.table.comb <- merge(biospecimen.tpnt.rna.merged.aliqPatient.table,biospecimen.tpnt.rna.merged.filt.aliqPatient.table, by = c("project","bcr_patient_barcode"))
colnames(biospecimen.tpnt.rna.merged.aliqPatient.table.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.tpnt.rna.merged.aliqPatient.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (nonlegacy)'
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Summarizing number of tumor and normal aliquots for all TCGA projects, pre- and post-filtering
biospecimen.legacy.tpnt.rna.cl.group <- biospecimen.legacy.tpnt.rna.cl %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.legacy.tpnt.rna.filt.group <- biospecimen.legacy.tpnt.rna.filt %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.legacy.group.table.comb <- merge(biospecimen.legacy.tpnt.rna.cl.group,biospecimen.legacy.tpnt.rna.filt.group, by = c("project","sample_type"))
colnames(biospecimen.tpnt.rna.legacy.group.table.comb) <- c("project", "sample_type", "pre", "post")
datatable(biospecimen.tpnt.rna.legacy.group.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
biospecimen.nonlegacy.tpnt.rna.cl.group <- biospecimen.nonlegacy.tpnt.rna.cl %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.nonlegacy.tpnt.rna.filt.group <- biospecimen.nonlegacy.tpnt.rna.filt %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.nonlegacy.group.table.comb <- merge(biospecimen.nonlegacy.tpnt.rna.cl.group,biospecimen.nonlegacy.tpnt.rna.filt.group, by = c("project","sample_type"))
colnames(biospecimen.tpnt.rna.nonlegacy.group.table.comb) <- c("project", "sample_type", "pre", "post")
datatable(biospecimen.tpnt.rna.nonlegacy.group.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
biospecimen.tpnt.rna.merged.cl.group <- biospecimen.tpnt.rna.merged.cl %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.merged.filt.group <- biospecimen.tpnt.rna.merged.filt %>% group_by(project,sample_type) %>% summarize(no_rows=length(project))
biospecimen.tpnt.rna.merged.group.table.comb <- merge(biospecimen.tpnt.rna.merged.cl.group,biospecimen.tpnt.rna.merged.filt.group, by = c("project","sample_type"))
colnames(biospecimen.tpnt.rna.merged.group.table.comb) <- c("project", "sample_type", "pre", "post")
datatable(biospecimen.tpnt.rna.merged.group.table.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
First I will add a column of ‘type’ with the TCGA projects.
I will add to the tables the BRCA molecular subtypes and the COAD MSI and MSS. I will subset the original projects to the subsets.
#################################################
# ADD BRCA molecular subtypes
# Get molecular subtypes
cancer <- "TCGA-BRCA"
PlatformCancer <- "IlluminaHiSeq_RNASeqV2"
dataType <- "rsem.genes.results"
pathCancer <- "TCGAData/miRNA"
data.category <- "Transcriptome Profiling"
molecular.subtypes <- PanCancerAtlas_subtypes()
molecular.subtypes.brca <- subset(molecular.subtypes, molecular.subtypes$cancer.type=="BRCA")
molecular.subtypes.brca$PatientID <- substr(molecular.subtypes.brca$pan.samplesID,1,12)
molecular.subtypes$PatientID <- substr(molecular.subtypes$pan.samplesID,1,12)
biospecimen.legacy.tpnt.rna.cl$type <- gsub("TCGA-","",biospecimen.legacy.tpnt.rna.cl$project)
# Add brca and Coad subtypes
biospecimen.legacy.tpnt.rna.cl.f <-add_brcaSubtypes(biospecimen.legacy.tpnt.rna.cl,molecular.subtypes)
biospecimen.legacy.tpnt.rna.cl.f <-add_coadSubtypes(biospecimen.legacy.tpnt.rna.cl.f)
dim(biospecimen.legacy.tpnt.rna.cl.f)
## [1] 27208 11
# Number of patients per project
biospecimen.legacy.tpnt.rna.patient.sub.table <- biospecimen.legacy.tpnt.rna.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tpnt.rna.aliq.sub.table <- biospecimen.legacy.tpnt.rna.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.legacy.tpnt.rna.aliqPatient.sub.table <-biospecimen.legacy.tpnt.rna.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
############# METASTATIC
biospecimen.legacy.tmnt.rna.cl$type <- gsub("TCGA-","",biospecimen.legacy.tmnt.rna.cl$project)
# Add brca and Coad subtypes
biospecimen.legacy.tmnt.rna.cl.f <-add_brcaSubtypes(biospecimen.legacy.tmnt.rna.cl,molecular.subtypes)
biospecimen.legacy.tmnt.rna.cl.f <-add_coadSubtypes(biospecimen.legacy.tmnt.rna.cl.f)
dim(biospecimen.legacy.tmnt.rna.cl.f)
## [1] 2829 11
# Number of patients per project
biospecimen.legacy.tmnt.rna.patient.sub.table <- biospecimen.legacy.tmnt.rna.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tmnt.rna.aliq.sub.table <- biospecimen.legacy.tmnt.rna.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.legacy.tmnt.rna.aliqPatient.sub.table <-biospecimen.legacy.tmnt.rna.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.nonlegacy.tpnt.rna.cl$type <- gsub("TCGA-","",biospecimen.nonlegacy.tpnt.rna.cl$project)
# Add brca and Coad subtypes
biospecimen.nonlegacy.tpnt.rna.cl.f <-add_brcaSubtypes(biospecimen.nonlegacy.tpnt.rna.cl,molecular.subtypes)
biospecimen.nonlegacy.tpnt.rna.cl.f <-add_coadSubtypes(biospecimen.nonlegacy.tpnt.rna.cl.f)
dim(biospecimen.nonlegacy.tpnt.rna.cl.f)
## [1] 31213 11
# Number of patients per project
biospecimen.nonlegacy.tpnt.rna.patient.sub.table <-biospecimen.nonlegacy.tpnt.rna.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tpnt.rna.aliq.sub.table <- biospecimen.nonlegacy.tpnt.rna.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.nonlegacy.tpnt.rna.aliqPatient.sub.table <-biospecimen.nonlegacy.tpnt.rna.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
############ METASTATIC
biospecimen.nonlegacy.tmnt.rna.cl$type <- gsub("TCGA-","",biospecimen.nonlegacy.tmnt.rna.cl$project)
# Add brca and Coad subtypes
biospecimen.nonlegacy.tmnt.rna.cl.f <-add_brcaSubtypes(biospecimen.nonlegacy.tmnt.rna.cl,molecular.subtypes)
biospecimen.nonlegacy.tmnt.rna.cl.f <-add_coadSubtypes(biospecimen.nonlegacy.tmnt.rna.cl.f)
dim(biospecimen.nonlegacy.tmnt.rna.cl.f)
## [1] 2926 11
# Number of patients per project
biospecimen.nonlegacy.tmnt.rna.patient.sub.table <-biospecimen.nonlegacy.tmnt.rna.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tmnt.rna.aliq.sub.table <- biospecimen.nonlegacy.tmnt.rna.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.nonlegacy.tmnt.rna.aliqPatient.sub.table <-biospecimen.nonlegacy.tmnt.rna.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.tpnt.rna.merged.cl$type <- gsub("TCGA-","",biospecimen.tpnt.rna.merged.cl$project)
# Add brca and Coad subtypes
biospecimen.tpnt.rna.merged.cl.f <-add_brcaSubtypes(biospecimen.tpnt.rna.merged.cl,molecular.subtypes)
biospecimen.tpnt.rna.merged.cl.f <-add_coadSubtypes(biospecimen.tpnt.rna.merged.cl.f)
dim(biospecimen.tpnt.rna.merged.cl.f)
## [1] 31213 11
# SAVE
save(biospecimen.tpnt.rna.merged.cl.f,file=paste0(tcgaIntermediateData,"biospecimen.merged.tpnt.RNA.noDup.sub.RData"))
# load(paste0("C:/Users/aimilia/BIOINF/1_DATA/3_TCGA/2_Clinical_Meta/New.v2/","biospecimen.merged.tpnt.RNA.noDup.sub.RData"))
# Number of patients per project
biospecimen.tpnt.rna.merged.patient.sub.table <-biospecimen.tpnt.rna.merged.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tpnt.rna.merged.aliq.sub.table <- biospecimen.tpnt.rna.merged.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.tpnt.rna.merged.aliqPatient.sub.table <-biospecimen.tpnt.rna.merged.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.tmnt.rna.merged.cl$type <- gsub("TCGA-","",biospecimen.tmnt.rna.merged.cl$project)
# Add brca and Coad subtypes
biospecimen.tmnt.rna.merged.cl.f <-add_brcaSubtypes(biospecimen.tmnt.rna.merged.cl,molecular.subtypes)
biospecimen.tmnt.rna.merged.cl.f <-add_coadSubtypes(biospecimen.tmnt.rna.merged.cl.f)
dim(biospecimen.tmnt.rna.merged.cl.f)
## [1] 2928 11
# Number of patients per project
biospecimen.tmnt.rna.merged.patient.sub.table <-biospecimen.tmnt.rna.merged.cl.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tmnt.rna.merged.aliq.sub.table <- biospecimen.tmnt.rna.merged.cl.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.tmnt.rna.merged.aliqPatient.sub.table <-biospecimen.tmnt.rna.merged.cl.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
To get post-filtering data, I extract the subtypes from non-filtered data, filter them and then merge with filtered data
biospecimen.legacy.tpnt.rna.filt$type <- gsub("TCGA-","",biospecimen.legacy.tpnt.rna.filt$project)
biospecimen.legacy.tpnt.rna.filt.f <-add_brcaSubtypesFilt(biospecimen.legacy.tpnt.rna.cl,biospecimen.legacy.tpnt.rna.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.legacy.tpnt.rna.filt.f <-add_coadSubtypesFilt(biospecimen.legacy.tpnt.rna.cl,biospecimen.legacy.tpnt.rna.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.legacy.tpnt.rna.filt.f)
## [1] 12225 11
# Number of patients per project
biospecimen.legacy.tpnt.rna.filt.patient.sub.table <- biospecimen.legacy.tpnt.rna.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tpnt.rna.filt.aliq.sub.table <- biospecimen.legacy.tpnt.rna.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.legacy.tpnt.rna.filt.aliqPatient.sub.table <-biospecimen.legacy.tpnt.rna.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
############## METASTATIVC
biospecimen.legacy.tmnt.rna.filt$type <- gsub("TCGA-","",biospecimen.legacy.tmnt.rna.filt$project)
biospecimen.legacy.tmnt.rna.filt.f <-add_brcaSubtypesFilt(biospecimen.legacy.tmnt.rna.cl,biospecimen.legacy.tmnt.rna.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.legacy.tmnt.rna.filt.f <-add_coadSubtypesFilt(biospecimen.legacy.tmnt.rna.cl,biospecimen.legacy.tmnt.rna.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.legacy.tmnt.rna.filt.f)
## [1] 1451 11
# Number of patients per project
biospecimen.legacy.tmnt.rna.filt.patient.sub.table <- biospecimen.legacy.tmnt.rna.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.legacy.tmnt.rna.filt.aliq.sub.table <- biospecimen.legacy.tmnt.rna.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.legacy.tmnt.rna.filt.aliqPatient.sub.table <-biospecimen.legacy.tmnt.rna.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.nonlegacy.tpnt.rna.filt$type <- gsub("TCGA-","",biospecimen.nonlegacy.tpnt.rna.filt$project)
biospecimen.nonlegacy.tpnt.rna.filt.f <-add_brcaSubtypesFilt(biospecimen.nonlegacy.tpnt.rna.cl,biospecimen.nonlegacy.tpnt.rna.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.nonlegacy.tpnt.rna.filt.f <-add_coadSubtypesFilt(biospecimen.nonlegacy.tpnt.rna.cl,biospecimen.nonlegacy.tpnt.rna.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.nonlegacy.tpnt.rna.filt.f)
## [1] 13286 11
# Number of patients per project
biospecimen.nonlegacy.tpnt.rna.filt.patient.sub.table <-biospecimen.nonlegacy.tpnt.rna.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tpnt.rna.filt.aliq.sub.table <- biospecimen.nonlegacy.tpnt.rna.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.nonlegacy.tpnt.rna.filt.aliqPatient.sub.table <-biospecimen.nonlegacy.tpnt.rna.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
########### METASTATIC
biospecimen.nonlegacy.tmnt.rna.filt$type <- gsub("TCGA-","",biospecimen.nonlegacy.tmnt.rna.filt$project)
biospecimen.nonlegacy.tmnt.rna.filt.f <-add_brcaSubtypesFilt(biospecimen.nonlegacy.tmnt.rna.cl,biospecimen.nonlegacy.tmnt.rna.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.nonlegacy.tmnt.rna.filt.f <-add_coadSubtypesFilt(biospecimen.nonlegacy.tmnt.rna.cl,biospecimen.nonlegacy.tmnt.rna.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.nonlegacy.tmnt.rna.filt.f)
## [1] 1500 11
# Number of patients per project
biospecimen.nonlegacy.tmnt.rna.filt.patient.sub.table <-biospecimen.nonlegacy.tmnt.rna.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.nonlegacy.tmnt.rna.filt.aliq.sub.table <- biospecimen.nonlegacy.tmnt.rna.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.nonlegacy.tmnt.rna.filt.aliqPatient.sub.table <-biospecimen.nonlegacy.tmnt.rna.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.tpnt.rna.merged.filt$type <- gsub("TCGA-","",biospecimen.tpnt.rna.merged.filt$project)
# Add brca and Coad subtypes
biospecimen.tpnt.rna.merged.filt.f <-add_brcaSubtypesFilt(biospecimen.tpnt.rna.merged.cl,biospecimen.tpnt.rna.merged.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.tpnt.rna.merged.filt.f <-add_coadSubtypesFilt(biospecimen.tpnt.rna.merged.cl,biospecimen.tpnt.rna.merged.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.tpnt.rna.merged.filt.f)
## [1] 13286 11
# Number of patients per project
biospecimen.tpnt.rna.merged.filt.patient.sub.table <-biospecimen.tpnt.rna.merged.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tpnt.rna.merged.filt.aliq.sub.table <- biospecimen.tpnt.rna.merged.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.tpnt.rna.merged.filt.aliqPatient.sub.table <-biospecimen.tpnt.rna.merged.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
############### METASTATIC
biospecimen.tmnt.rna.merged.filt$type <- gsub("TCGA-","",biospecimen.tmnt.rna.merged.filt$project)
# Add brca and Coad subtypes
biospecimen.tmnt.rna.merged.filt.f <-add_brcaSubtypesFilt(biospecimen.tmnt.rna.merged.cl,biospecimen.tmnt.rna.merged.filt,molecular.subtypes)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
biospecimen.tmnt.rna.merged.filt.f <-add_coadSubtypesFilt(biospecimen.tmnt.rna.merged.cl,biospecimen.tmnt.rna.merged.filt.f)
## [1] "RNA"
## [1] "Grabbing RNA..."
## [1] "Filter according to portion number"
## [1] "Filter according to plate number"
## [1] "ooo No more duplicates,filter barcodes successfully!"
dim(biospecimen.tmnt.rna.merged.filt.f)
## [1] 1501 11
# Number of patients per project
biospecimen.tmnt.rna.merged.filt.patient.sub.table <-biospecimen.tmnt.rna.merged.filt.f %>% select(type,bcr_patient_barcode) %>% group_by(type) %>% distinct() %>% summarize(n())
# Number of aliquots per project
biospecimen.tmnt.rna.merged.filt.aliq.sub.table <- biospecimen.tmnt.rna.merged.filt.f %>% select(type,bcr_aliquot_barcode) %>% group_by(type) %>% distinct() %>% summarise(n())
# Table of number of replicates per patients per project
biospecimen.tmnt.rna.merged.filt.aliqPatient.sub.table <-biospecimen.tmnt.rna.merged.filt.f %>% select(type,bcr_patient_barcode, bcr_aliquot_barcode) %>% group_by(type,bcr_patient_barcode) %>% distinct() %>% summarize(n())
biospecimen.legacy.tpnt.rna.patient.table.sub.comb <- merge(biospecimen.legacy.tpnt.rna.patient.sub.table,biospecimen.legacy.tpnt.rna.filt.patient.sub.table, by = "type")
colnames(biospecimen.legacy.tpnt.rna.patient.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.legacy.tpnt.rna.patient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (legacy)'
)
biospecimen.legacy.tpnt.rna.aliq.table.sub.comb <- merge(biospecimen.legacy.tpnt.rna.aliq.sub.table,biospecimen.legacy.tpnt.rna.filt.patient.sub.table, by = "type")
colnames(biospecimen.legacy.tpnt.rna.aliq.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.legacy.tpnt.rna.aliq.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (legacy)'
)
biospecimen.legacy.tpnt.rna.aliqPatient.table.sub.comb <- merge(biospecimen.legacy.tpnt.rna.aliqPatient.sub.table,biospecimen.legacy.tpnt.rna.filt.aliqPatient.sub.table, by = c("type","bcr_patient_barcode"))
colnames(biospecimen.legacy.tpnt.rna.aliqPatient.table.sub.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.legacy.tpnt.rna.aliqPatient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (legacy)'
)
biospecimen.nonlegacy.tpnt.rna.patient.table.sub.comb <- merge(biospecimen.nonlegacy.tpnt.rna.patient.sub.table,biospecimen.nonlegacy.tpnt.rna.filt.patient.sub.table, by = "type")
colnames(biospecimen.nonlegacy.tpnt.rna.patient.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.patient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (nonlegacy)'
)
biospecimen.nonlegacy.tpnt.rna.aliq.table.sub.comb <- merge(biospecimen.nonlegacy.tpnt.rna.aliq.sub.table,biospecimen.nonlegacy.tpnt.rna.filt.patient.sub.table, by = "type")
colnames(biospecimen.nonlegacy.tpnt.rna.aliq.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.aliq.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (nonlegacy)'
)
biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.sub.comb <- merge(biospecimen.nonlegacy.tpnt.rna.aliqPatient.sub.table,biospecimen.nonlegacy.tpnt.rna.filt.aliqPatient.sub.table, by = c("type","bcr_patient_barcode"))
colnames(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.sub.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.nonlegacy.tpnt.rna.aliqPatient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (nonlegacy)'
)
biospecimen.tpnt.rna.merged.patient.table.sub.comb <- merge(biospecimen.tpnt.rna.merged.patient.sub.table,biospecimen.tpnt.rna.merged.filt.patient.sub.table, by = "type")
colnames(biospecimen.tpnt.rna.merged.patient.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.tpnt.rna.merged.patient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of patients per project (nonlegacy)'
)
biospecimen.tpnt.rna.merged.aliq.table.sub.comb <- merge(biospecimen.tpnt.rna.merged.aliq.sub.table,biospecimen.tpnt.rna.merged.filt.patient.sub.table, by = "type")
colnames(biospecimen.tpnt.rna.merged.aliq.table.sub.comb) <- c("project","pre","post")
datatable(biospecimen.tpnt.rna.merged.aliq.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per project (nonlegacy)'
)
biospecimen.tpnt.rna.merged.aliqPatient.table.sub.comb <- merge(biospecimen.tpnt.rna.merged.aliqPatient.sub.table,biospecimen.tpnt.rna.merged.filt.aliqPatient.sub.table, by = c("type","bcr_patient_barcode"))
colnames(biospecimen.tpnt.rna.merged.aliqPatient.table.sub.comb) <- c("project","patientID","pre","post")
datatable(biospecimen.tpnt.rna.merged.aliqPatient.table.sub.comb, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'csv' ),
scrollX=TRUE,
pageLength=15
),
caption = 'Number of aliquots per patient per project (nonlegacy)'
)
session_info <- sessionInfo()
writeLines(capture.output(session_info), paste0(sessionInfoPath,"A_05_get_TCGA_biospecimen_RNA.txt"))
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] extrafont_0.18 DT_0.22 dplyr_1.0.9 DESeq2_1.34.0 SummarizedExperiment_1.24.0
## [6] Biobase_2.54.0 MatrixGenerics_1.6.0 matrixStats_0.62.0 GenomicRanges_1.46.1 GenomeInfoDb_1.30.1
## [11] IRanges_2.28.0 S4Vectors_0.32.4 BiocGenerics_0.40.0 TCGAbiolinks_2.22.4 devtools_2.4.3
## [16] usethis_2.1.5 tictoc_1.1 rmarkdown_2.14 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 XVector_0.34.0 fs_1.5.2 rstudioapi_0.13
## [6] remotes_2.4.2 bit64_4.0.5 AnnotationDbi_1.56.2 fansi_1.0.3 xml2_1.3.3
## [11] splines_4.1.0 R.methodsS3_1.8.1 cachem_1.0.6 geneplotter_1.72.0 knitr_1.41
## [16] pkgload_1.3.2 jsonlite_1.8.3 Rttf2pt1_1.3.10 annotate_1.72.0 dbplyr_2.1.1
## [21] png_0.1-7 R.oo_1.24.0 BiocManager_1.30.16 readr_2.1.2 compiler_4.1.0
## [26] httr_1.4.7 assertthat_0.2.1 Matrix_1.4-0 fastmap_1.1.0 cli_3.6.2
## [31] htmltools_0.5.3 prettyunits_1.1.1 tools_4.1.0 gtable_0.3.1 glue_1.6.2
## [36] GenomeInfoDbData_1.2.7 rappdirs_0.3.3 Rcpp_1.0.9 jquerylib_0.1.4 vctrs_0.4.1
## [41] Biostrings_2.62.0 extrafontdb_1.0 crosstalk_1.2.0 xfun_0.35 stringr_1.4.0
## [46] ps_1.7.0 rvest_1.0.2 lifecycle_1.0.1 XML_3.99-0.9 zlibbioc_1.40.0
## [51] scales_1.2.0 hms_1.1.2 RColorBrewer_1.1-3 yaml_2.3.6 curl_5.2.0
## [56] memoise_2.0.1 ggplot2_3.3.6 downloader_0.4 sass_0.4.1 biomaRt_2.50.3
## [61] stringi_1.7.8 RSQLite_2.2.13 genefilter_1.76.0 filelock_1.0.2 pkgbuild_1.3.1
## [66] BiocParallel_1.28.3 rlang_1.1.3 pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.18
## [71] TCGAbiolinksGUI.data_1.14.1 lattice_0.20-45 purrr_0.3.4 htmlwidgets_1.5.4 bit_4.0.5
## [76] processx_3.8.0 tidyselect_1.1.2 plyr_1.8.7 magrittr_2.0.3 R6_2.5.1
## [81] generics_0.1.3 DelayedArray_0.20.0 DBI_1.1.2 pillar_1.8.0 survival_3.3-1
## [86] KEGGREST_1.34.0 RCurl_1.98-1.6 tibble_3.1.7 crayon_1.5.2 utf8_1.2.2
## [91] BiocFileCache_2.2.1 tzdb_0.3.0 progress_1.2.2 locfit_1.5-9.5 grid_4.1.0
## [96] data.table_1.14.2 blob_1.2.3 callr_3.7.3 digest_0.6.29 xtable_1.8-4
## [101] tidyr_1.2.0 R.utils_2.12.0 munsell_0.5.0 bslib_0.3.1 sessioninfo_1.2.2